import re
import requests

url = 'https://www.dytt.to/html/gndy/dyzz/list_23_1.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
}


def parse(url):
    # 爬取电影天堂网数据
    res = requests.get(url, headers=headers)
    find = re.compile('height="26".*?<a href="(.*?)".*?>(.*?)<', re.S)
    # re获取括号里面内容
    for link, title in find.findall(res.text):
        # 获取标题
        link = 'https://www.dytt.to' + link
        # print(title)
        # parseDetails(link)

    # 获取是否有下一页
    findNext = re.compile("<a href='(.*?)'>下一页</a>")  # re.S 这里不要加换行，否则全部href=符合全部匹配，不是匹配下一页这个链接
    nextLink = findNext.findall(res.text)
    if (len(nextLink) != 0):
        link = 'https://www.dytt.to/html/gndy/dyzz/' + nextLink[0]
        print(link)
        parse(link)


# 解析详情页
def parseDetails(url):
    res = requests.get(url, headers=headers)
    rule = re.compile('◎译.*?名\u3000(.*?)<br />◎片.*?名\u3000(.*?)<br />◎年.*?代\u3000(.*?)<br />', re.S)
    for name, title, year in rule.findall(res.text):
        print(name, title, year)


if __name__ == '__main__':
    parse(url)
